us_renew <- read_csv(here("data","renewables_cons_prod.csv")) %>%
clean_names()
renew_clean <- us_renew %>%
mutate(description = str_to_lower(description)) %>%
filter(str_detect(description, pattern = "consumption")) %>%
filter(!str_detect(description, pattern = "total")) #using ! gets rid of it
renew_date <- renew_clean %>%
mutate(yr_mo_day = lubridate::parse_date_time(yyyymm, "ym")) %>% #will get NAs for month = 13
mutate(month_sep = tsibble::yearmonth(yr_mo_day)) %>%
mutate(value = as.numeric(value)) %>%
drop_na(month_sep, value)
#Make a version where I have the month and year in separate columns
renew_parsed <- renew_date %>%
mutate(month = month(yr_mo_day, label = TRUE)) %>% #this function just takes out the month
mutate(year = year(yr_mo_day))
renew_gg <- ggplot(renew_date, aes(x = month_sep,
y = value,
group = description)) +
geom_line(aes(color=description))
renew_gg
#this is a discreet palette, need enough colors in the package that you want
Updating my colors with paletteer palettes:
renew_gg +
scale_color_paletteer_d("palettetown::seadra")
renew_ts <- as_tsibble(renew_parsed, key = description, index = month_sep)
Let’s look at our time-series data in a couple different ways
renew_ts %>% autoplot(value)
renew_ts %>% gg_subseries(value) #breaks up each source by month across different years
#renew_ts %>% gg_season(value) #this doesn't work, but you can make in ggplot
ggplot(data = renew_parsed, aes(x = month, y = value, group = year))+
geom_line(aes(color = year))+
facet_wrap(~description,
ncol = 1,
scales = "free",
strip.position = "right") #description names on the side
hydro_ts <- renew_ts %>%
filter(description == "hydroelectric power consumption")
hydro_ts %>% autoplot(value)
hydro_ts %>% gg_subseries(value)
ggplot(data = hydro_ts, aes(x = month, y = value, group = year))+
geom_line(aes(color = year))+
facet_wrap(~description,
ncol = 1,
scales = "free",
strip.position = "right")
hydro_quarterly <- hydro_ts %>%
index_by(year_qu = ~(yearquarter(.))) %>% #index over a particular increment of time
summarize(avg_consumption = mean(value))
head(hydro_quarterly)
## # A tsibble: 6 x 2 [1Q]
## year_qu avg_consumption
## <qtr> <dbl>
## 1 1973 Q1 261.
## 2 1973 Q2 255.
## 3 1973 Q3 212.
## 4 1973 Q4 225.
## 5 1974 Q1 292.
## 6 1974 Q2 290.
dcmp <- hydro_ts %>%
model(STL(value ~ season(window = 5))) #window is your window for the moving average
components(dcmp) %>% autoplot() #this will plot your decomposed time series data
hist(components(dcmp)$remainder) # looks normal!
Now look at ACF:
hydro_ts %>%
ACF(value) %>%
autoplot()
#looking at this autoplot, we can see that observations that are 12 months apart are more highly correlated than any other observations any other distance apart
hydro_model <- hydro_ts %>%
model(
ARIMA(value),
ETS(value)
) %>%
fabletools::forecast(h = "4 years")
hydro_model %>% autoplot(filter(hydro_ts, year(month_sep) > 2010)) #take the year part of month_sep
world <- read_sf(dsn = here("data", "TM_WORLD_BORDERS_SIMPL-0.3-1"),
layer = "TM_WORLD_BORDERS_SIMPL-0.3")
mapview(world)